import pickle as pkl
import pandas as pd
with open(r"C:\Users\GUO\Documents\WeChat Files\wxid_73ihnide6ynu21\FileStorage\File\2023-09\stock_price.pkl", "rb") as f:
object = pkl.load(f,encoding='latin1')
df = pd.DataFrame(object)
df.to_csv(r"D:\momentum_without_crush\stock_price.csv")
暂时缺少市值的数据,所以先直接用了等权重作为market_index的计算方式
import pandas as pd
import numpy as np
# Read the data and log-transform it
data_path = r"D:\momentum_without_crush\stock_price.csv" # Replace this with your file path
df = pd.read_csv(data_path, parse_dates=['trade_dt'])
log_df = np.log(df.set_index('trade_dt'))
# Calculate the market index as the equally-weighted mean of all assets
market_index = log_df.mean(axis=1).values
已知$\quad \pi_0=1, \quad \pi_s=(-1)^s \prod_{i=0}^{s-1} \frac{d-i}{s !}, \quad \text { for } \quad s>0$ 利用FFT计算分数差分 $\tilde{p}_{t, i}=\sum_{s=0}^{\infty} \pi_s p_{t-s, i}$
from decimal import Decimal, getcontext
from pyecharts.charts import Bar, Grid, Line,Scatter, Timeline, Kline, HeatMap
from pyecharts import options as opts
getcontext().prec = 50
def calculate_weights_decimal(d, max_lag=100000):
weights = [Decimal(1.0)]
for k in range(1, max_lag):
weight = -weights[-1] * Decimal((d - k + 1) / k)
weights.append(weight)
return weights
d_values = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
pi_s_results_decimal = {d: calculate_weights_decimal(d, len(log_df)) for d in d_values}
def fft_fractional_differencing(series, pi_s):
n = len(series)
m = len(pi_s)
padded_series = np.pad(series, (0, m-1), 'constant')
padded_pi_s = np.pad(pi_s, (0, n-1), 'constant')
series_fft = np.fft.fft(padded_series)
pi_s_fft = np.fft.fft(padded_pi_s)
convolved_fft = series_fft * pi_s_fft
convolved_series = np.fft.ifft(convolved_fft)
return np.real(convolved_series[:n])
differenced_market_index_dict_fft_decimal = {}
for d in d_values:
pi_s_values = np.array(pi_s_results_decimal[d], dtype=float)
differenced_market_index = fft_fractional_differencing(market_index, pi_s_values)
differenced_market_index_dict_fft_decimal[d] = differenced_market_index
展示了系数$\pi_s$的具体数值,我们可以看到在$d=0$和$d=1$时与不做差分处理/传统一阶差分的特性
max_lag_plot = 100
pi_s_plot_data = []
for d in d_values:
pi_s_values = calculate_weights_decimal(d, max_lag=max_lag_plot)
pi_s_values_float = [float(val) for val in pi_s_values] # Convert to float for plotting
pi_s_plot_data.append((f'd={d}', pi_s_values_float))
pi_s_line_chart = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme='dark'))
pi_s_line_chart.add_xaxis(list(range(max_lag_plot + 1))) # Adding s values on x-axis
for name, y_values in pi_s_plot_data:
pi_s_line_chart.add_yaxis(
name,
y_values,
label_opts=opts.LabelOpts(is_show=False),
symbol="none"
)
pi_s_line_chart.set_global_opts(
xaxis_opts=opts.AxisOpts(type_="category"),
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"),
datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
)
pi_s_line_chart.render_notebook()
分数差分后的结果,d = 0为只做了对数化处理,d = 1为一阶差分,可以看到随着差分阶数更接近1,数据在保留相应记忆性的前提下平稳性更好了
plot_data_optimized = []
date_range = df['trade_dt'].astype(str).tolist()
for d in d_values:
plot_data_optimized.append((f'd={d}', differenced_market_index_dict_fft_decimal[d].tolist()))
line_chart = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme = 'dark'))
line_chart.add_xaxis(date_range)
for name, y_values in plot_data_optimized:
line_chart.add_yaxis(
name,
y_values,
label_opts=opts.LabelOpts(is_show=False),
)
line_chart.set_global_opts(
xaxis_opts=opts.AxisOpts(type_="category"),
datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)
# Render the chart
line_chart.render_notebook()
we express the predicted return as a weighted sum of all available past log-prices $\widehat{r}_{T+1}=\sum_{u=1}^T w_u p_u$, where: $$ w_u= \begin{cases}\left(\frac{1}{\tau} \sum_{s=T-u-\tau+1}^{T-u} \pi_s\right)-\pi_{T-u+1}, & \text { for } u=1, \ldots, T-\tau \\ \left(\frac{1}{\tau} \sum_{s=0}^{T-u} \pi_s\right)-\pi_{T-u+1}, & \text { for } u=T-\tau+1, \ldots, T-1 \\ \frac{\pi_0}{\tau}-\pi_1-1, & \text { for } u=T\end{cases} $$
Therefore, allows us to approximate the fractional momentum as: $$ \widehat{r}_{T+1} \approx-\frac{w_{T-\tau}}{\tau-1} \sum_{u=T-\tau+1}^{T-1}\left(p_u-p_{T-\tau}\right)+\sum_{u=T-\tau+1}^{T-1} \widetilde{w}_u\left(p_u-p_T\right), $$ where $\widetilde{w}_u=w_u+\left(w_{T-\tau}\right) /(\tau-1)$.
作者核心思路上认为分数动量被分成了两个部分,$\sum_{u=T-\tau+1}^{T-1}\left(p_u-p_{T-\tau}\right)$代表的动量信号,$\sum_{u=T-\tau+1}^{T-1}\left(p_u-p_T\right)$代表的反转信号,并且在后续针对系数$\widetilde{w}_u, w_u$的计算中,注意到动量信号的权重为正,反转信号的权重随着滞后的增加而迅速下降。
def calculate_w_u_modified(T, tau, pi_s):
pi_s = np.array([float(x) for x in pi_s]) # Convert Decimal to float
w_u_values = np.zeros(T)
for u in range(1, T - tau + 1):
w_u_values[u-1] = (1 / tau) * np.sum(pi_s[T-u-tau : T-u]) - pi_s[T-u]
for u in range(T - tau + 1, T):
w_u_values[u-1] = (1 / tau) * np.sum(pi_s[:T-u]) - pi_s[T-u]
w_u_values[-1] = pi_s[0] / tau - pi_s[1] - 1
return w_u_values
T = len(log_df)
tau = 250
w_u_results_direct_modified = {}
for d in d_values:
pi_s_values = pi_s_results_decimal[d]
w_u_values = calculate_w_u_modified(T, tau, pi_s_values)
w_u_results_direct_modified[d] = w_u_values
这里给出了衡量过去价格影响 $\widehat{r}_{T+1}=\sum_{u=1}^T w_u p_u$的系数$w_u$值的变化,
date_range = range(T-tau, T)
line_chart = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme='dark'))
line_chart.add_xaxis(date_range)
for d, y_values in w_u_results_direct_modified.items():
line_chart.add_yaxis(
str(d),
y_values[T-tau: ],
label_opts=opts.LabelOpts(is_show=False)
)
line_chart.set_global_opts(
xaxis_opts=opts.AxisOpts(type_="category"),
datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)
line_chart.render_notebook()
类似的给出了$\widetilde{w}_u$的系数变化,我们可以注意到其确实有衰减效应,这和我们对反转效应是一个相对短期影响的认识相符合
def calculate_tilde_w_u(T, tau, pi_s_values):
w_u_values = calculate_w_u_modified(T, tau, pi_s_values)
w_T_minus_tau = w_u_values[T-tau]
tilde_w_u_values = [w_u + w_T_minus_tau / (tau - 1) for w_u in w_u_values]
return tilde_w_u_values
tilde_w_u_results = {}
for d in d_values:
pi_s_values = pi_s_results_decimal[d] # Assuming you've calculated pi_s_values already
tilde_w_u_values = calculate_tilde_w_u(T, tau, pi_s_values)
tilde_w_u_results[d] = tilde_w_u_values
line_chart_tilde = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme='dark'))
line_chart_tilde.add_xaxis(date_range)
for d, y_values in tilde_w_u_results.items():
line_chart_tilde.add_yaxis(
str(d),
y_values[T-tau: T - 1],
label_opts=opts.LabelOpts(is_show=False)
)
line_chart_tilde.set_global_opts(
xaxis_opts=opts.AxisOpts(type_="category"),
datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)
line_chart_tilde.render_notebook()
计算$\widehat{r}_{t}$,也即我们的因子 $$ \widehat{r}_{T+1} \approx-\frac{w_{T-\tau}}{\tau-1} \sum_{u=T-\tau+1}^{T-1}\left(p_u-p_{T-\tau}\right)+\sum_{u=T-\tau+1}^{T-1} \widetilde{w}_u\left(p_u-p_T\right) + w_Tp_T, $$ 后续策略将会围绕它展开,针对不同的d和每一只股票都进行了计算
import numpy as np
def calculate_r_hat_optimized(series, w_u_values, tilde_w_u_values, tau):
T = len(series)
r_hat = np.zeros(T)
cum_sum_series = np.cumsum(series)
factor_first_term = -w_u_values[tau:T] / (tau - 1)
sum_series_first_term = cum_sum_series[tau - 1:T - 1] - cum_sum_series[:T - tau]
sum_series_first_term += series[:T - tau] # add back the subtracted terms
# Vectorized calculation for second term
sum_series_second_term = np.array([
np.dot(tilde_w_u_values[t - tau + 1:t], series[t - tau + 1:t] - series[t]) for t in range(tau, T)
])
r_hat[tau:T] = factor_first_term * sum_series_first_term + sum_series_second_term
return r_hat
r_hat_dict_optimized = {}
# Loop over each d value
for d in d_values:
print(f"Calculating for d = {d}...")
w_u_values = w_u_results_direct_modified[d]
r_hat_dict_optimized[d] = {}
# Loop over each stock in the dataframe
for stock in df.columns[1:]: # Skip the 'trade_dt' column
prices = df[stock].dropna().values
if len(prices) < tau:
continue
r_hat_values = calculate_r_hat_optimized(prices, w_u_values, tilde_w_u_values, tau)
r_hat_dict_optimized[d][stock] = r_hat_values
Calculating for d = 0... Calculating for d = 0.1... Calculating for d = 0.2... Calculating for d = 0.3... Calculating for d = 0.4... Calculating for d = 0.5... Calculating for d = 0.6... Calculating for d = 0.7... Calculating for d = 0.8... Calculating for d = 0.9... Calculating for d = 1...
max_length = max(max(len(arr) for arr in stock_dict.values()) for stock_dict in r_hat_dict_optimized.values())
# Pad each r_hat array with NaNs to match the maximum length
for d, stock_dict in r_hat_dict_optimized.items():
for stock, r_hat_array in stock_dict.items():
padded_array = np.pad(r_hat_array, (0, max_length - len(r_hat_array)), 'constant', constant_values=np.nan)
r_hat_dict_optimized[d][stock] = padded_array
# Get the time index from log_df and pad it with NaNs to match the maximum length
time_index = log_df.index.to_numpy()
padded_time_index = np.pad(time_index, (0, max_length - len(time_index)), 'constant', constant_values=np.nan)
# Save each r_hat DataFrame as a CSV, including the time column
for d, r_hat_data in r_hat_dict_optimized.items():
r_hat_df = pd.DataFrame(r_hat_data)
r_hat_df['Time'] = padded_time_index # Add the time column
r_hat_df = r_hat_df[['Time'] + [col for col in r_hat_df.columns if col != 'Time']] # Reorder columns to put 'Time' first
r_hat_df.to_csv(f"r_hat_values_for_d_{d}.csv", index=False)
夏普比:投资者风险每增加一单位,对应的超额收益增加多少
最大回撤:可能发生亏损的最大幅度(这个指标和策略的周期高度相关——不会真有人天天换股票还可能亏20%吧)
IC:股票在调仓周期期初排名和调仓周期期末收益排名的线性相关性(0.5)
IR:IC的多周期均值/IC的标准方差,代表因子获取稳定超额收益的能力(在装B的人嘴里叫$\alpha$),一般以0.05为界限
rankIC:用了秩相关系数,用排名之间的相关性规避了可能单纯计算相关性带来的数值问题
t值:判断因子针对收益是否显著
import pandas as pd
import numpy as np
from scipy.stats import spearmanr
def calculate_metrics_from_capital(capital_over_time, trade_frequency, annual_trading_days=252):
# Calculate portfolio returns from capital_over_time
portfolio_returns = np.diff(capital_over_time) / capital_over_time[:-1]
# Annualization factor
annualization_factor = annual_trading_days / trade_frequency
# Metrics calculation
annualized_return = np.mean(portfolio_returns) * annualization_factor
annualized_volatility = np.std(portfolio_returns) * np.sqrt(annualization_factor)
sharpe_ratio = annualized_return / annualized_volatility # Assuming risk-free rate is 0
max_accumulated = np.maximum.accumulate(capital_over_time)
drawdowns = 1 - capital_over_time / max_accumulated
max_drawdown = -np.max(drawdowns)
spearman_r, _ = spearmanr(portfolio_returns[:-1], portfolio_returns[1:])
ic = spearman_r
icir = ic / np.std(portfolio_returns) # Using standard ICIR formula
t_stat = (np.mean(portfolio_returns) / np.std(portfolio_returns)) * np.sqrt(len(portfolio_returns))
rank_ic, _ = spearmanr(portfolio_returns[:-1], np.diff(np.cumprod(1 + np.array(portfolio_returns)) - 1))
winning_periods = np.sum(portfolio_returns > 0)
total_periods = len(portfolio_returns)
win_rate = winning_periods / total_periods
metrics = pd.DataFrame({
'Indicator': ['Annualized Return', 'Annualized Volatility', 'Sharpe Ratio', 'Max Drawdown', 'IC', 'ICIR', 't-statistic', 'Rank IC',
'Win_rate'],
'Value': [annualized_return, annualized_volatility, sharpe_ratio, max_drawdown, ic, icir, t_stat, rank_ic, win_rate]
})
return metrics
模拟了一个分多头/空头组的策略
首先确定调仓频率(周/月)
之后根据因子值进行排名,取前$quantile_1$作为多头组,后$quantile_2$作为空头组,多头组在周期开始时买入股票,结束时卖出;空头组在周期开始时以“借买股票需要的前”的方式“买入”,结束时还对方相应的股票(所以需要预期下跌才会有收益)
依据这样的多空策略框架进行操作,具体有一个script比较详细的给了每一步买什么收益率如何
# 定义模拟交易函数
def run_simulation(merged_data, initial_capital, strategy_mode, trading_days_in_cycle, investment_fraction, shorting_fraction):
if strategy_mode == 'both':
ratio_for_long = 0.5
elif strategy_mode == 'long':
ratio_for_long = 1.0
elif strategy_mode == 'short':
ratio_for_long = 0.0
else:
raise ValueError("Invalid strategy_mode. Choose 'long', 'short', or 'both'")
long_capital = initial_capital * ratio_for_long
short_capital = initial_capital - long_capital
capital = initial_capital
long_capital_over_time = [long_capital]
short_capital_over_time = [short_capital]
capital_over_time = [capital]
time_over_time = [merged_data['Date'].iloc[0]] # Initialize with the first date
for i in range(0, len(merged_data), trading_days_in_cycle):
cycle_data = merged_data.iloc[i:i+trading_days_in_cycle]
if len(cycle_data) < trading_days_in_cycle:
break
first_day_data = cycle_data.iloc[0]
last_day_data = cycle_data.iloc[-1]
time_over_time.append(last_day_data['Date']) # Add the last date of this cycle
stock_r_hat_values = first_day_data.filter(like='_r_hat')
stock_r_hat_values_sorted = stock_r_hat_values.sort_values(ascending=False)
num_stocks_to_invest = int(len(stock_r_hat_values_sorted) * investment_fraction)
num_stocks_to_short = int(len(stock_r_hat_values_sorted) * shorting_fraction)
selected_stocks = stock_r_hat_values_sorted.index[:num_stocks_to_invest]
shorted_stocks = stock_r_hat_values_sorted.index[-num_stocks_to_short:]
selected_stock_prices_first_day = first_day_data[selected_stocks.str.replace('_r_hat', '_price')]
selected_stock_prices_last_day = last_day_data[selected_stocks.str.replace('_r_hat', '_price')]
shorted_stock_prices_first_day = first_day_data[shorted_stocks.str.replace('_r_hat', '_price')]
shorted_stock_prices_last_day = last_day_data[shorted_stocks.str.replace('_r_hat', '_price')]
amount_per_long_stock = long_capital / num_stocks_to_invest if num_stocks_to_invest > 0 else 0
amount_per_shorted_stock = short_capital / num_stocks_to_short if num_stocks_to_short > 0 else 0
shares_bought = amount_per_long_stock / selected_stock_prices_first_day
amount_after_selling = shares_bought * selected_stock_prices_last_day
long_profit_loss = amount_after_selling - amount_per_long_stock
shares_shorted = amount_per_shorted_stock / shorted_stock_prices_first_day
amount_after_covering = shares_shorted * shorted_stock_prices_last_day
short_profit_loss = amount_per_shorted_stock - amount_after_covering
long_capital = np.sum(amount_after_selling)
short_capital += np.sum(short_profit_loss)
capital = long_capital + short_capital
long_capital_over_time.append(long_capital)
short_capital_over_time.append(short_capital)
capital_over_time.append(capital)
final_capital = capital_over_time[-1]
capital_time_df = pd.DataFrame({
'Date': pd.to_datetime(time_over_time),
'Capital': capital_over_time
})
return final_capital, capital_over_time, long_capital_over_time, short_capital_over_time, capital_time_df
from collections import defaultdict
# Function to calculate annual performance metrics based on capital over time
def calculate_annual_performance(capital_over_time,long_capital_over_time, short_capital_over_time, trading_days_in_cycle):
# Initialize variables
annual_performance = defaultdict(lambda: defaultdict(float))
num_cycles_per_year = 252 // trading_days_in_cycle # Approximate number of cycles in a trading year
# Loop through each year and calculate performance metrics
for year_start in range(0, len(capital_over_time), num_cycles_per_year):
year_end = min(year_start + num_cycles_per_year, len(capital_over_time) - 1)
year_capital_data = capital_over_time[year_start:year_end + 1]
year_long_capital_data = long_capital_over_time[year_start:year_end + 1]
year_short_capital_data = short_capital_over_time[year_start:year_end + 1]
# Skip if not enough data for the year
if len(year_capital_data) < 2:
continue
# Calculate metrics
start_capital = year_capital_data[0]
end_capital = year_capital_data[-1]
annual_return = (end_capital / start_capital - 1) * 100 # in percentage
start_long_capital = year_long_capital_data[0]
end_long_capital = year_long_capital_data[-1]
annual_long_return = (end_long_capital / start_long_capital - 1) * 100 # in percentage
# For the short capital
start_short_capital = year_short_capital_data[0]
end_short_capital = year_short_capital_data[-1]
annual_short_return = (end_short_capital / start_short_capital - 1) * 100 # in percentage
# Calculating Max Drawdown correctly based on cummax
running_max = np.maximum.accumulate(year_capital_data)
drawdowns = 1 - (year_capital_data / running_max)
max_drawdown = -np.max(drawdowns) * 100 # in percentage
year = year_start // num_cycles_per_year + 2012 # Starting from Year 1 (2012 in this case)
annual_performance[year]['Annual Return'] = annual_return
annual_performance[year]['Long Annual Return'] = annual_long_return
annual_performance[year]['Short Annual Return'] = annual_short_return
annual_performance[year]['Max Drawdown'] = max_drawdown
negative_return_years = []
for year, metrics in annual_performance.items():
if metrics['Annual Return'] < 0:
negative_return_years.append(year)
return pd.DataFrame.from_dict(annual_performance, orient='index'), negative_return_years
import os
def merge_r_hat_and_stock_price(r_hat_folder_path, stock_price_file_path, start_date):
"""
Merge r_hat data and stock_price data from the specified folder and file path.
Parameters:
- r_hat_folder_path: The folder containing CSV files of r_hat data for various d values.
- stock_price_file_path: The file path for the stock_price data CSV file.
Returns:
- A dictionary where keys are the d values and values are the merged DataFrames for each d value.
"""
stock_price_data = pd.read_csv(stock_price_file_path)
stock_price_columns = set(stock_price_data.columns)
merged_data_dict = {}
for filename in os.listdir(r_hat_folder_path):
if filename.endswith(".csv"):
d_value = float(filename.split("_")[-1].replace(".csv", "").replace("d", ""))
r_hat_data = pd.read_csv(os.path.join(r_hat_folder_path, filename))
r_hat_columns = set(r_hat_data.columns)
common_columns = r_hat_columns.intersection(stock_price_columns)
common_stock_columns = common_columns - {'Time', 'trade_dt'}
filtered_r_hat_data = r_hat_data[['Time'] + list(common_stock_columns)]
filtered_stock_price_data = stock_price_data[['trade_dt'] + list(common_stock_columns)]
filtered_r_hat_data = filtered_r_hat_data.rename(columns={"Time": "Date"})
filtered_stock_price_data = filtered_stock_price_data.rename(columns={"trade_dt": "Date"})
merged_data = pd.merge(filtered_r_hat_data, filtered_stock_price_data, on="Date", suffixes=('_r_hat', '_price'))
if start_date is not None:
merged_data = merged_data[merged_data['Date'] >= start_date]
merged_data_dict[d_value] = merged_data
return merged_data_dict
merged_data_dict = merge_r_hat_and_stock_price(r"C:\Users\GUO\hello\.venv\quant\momentum_without_crash", r"D:\momentum_without_crush\stock_price.csv", '2001-02-17')
# 遍历字典中的每一个 DataFrame
for key in merged_data_dict:
df = merged_data_dict[key]
# 对于每一个 '_r_hat' 列
# Correctly align the '_r_hat' columns with their corresponding '_price' columns based on the new criteria
for col in df.columns:
if '_r_hat' in col:
# Find the corresponding '_price' column
price_col = col.replace('_r_hat', '_price')
# Check if the corresponding '_price' column exists
if price_col in df.columns:
# Check if '_r_hat' column is not all NaN
if not df[col].isna().all():
# Find the last valid index for '_price' column
price_last_valid = df[price_col].last_valid_index()
# Set the second last valid value of '_r_hat' to align with the last valid value of '_price'
if price_last_valid is not None:
r_hat_second_last_valid = df[col].iloc[:price_last_valid].last_valid_index() - 1
# Perform the shift to align
shift_rows = price_last_valid - r_hat_second_last_valid
df[col] = df[col].shift(shift_rows)
# 更新字典中的 DataFrame
merged_data_dict[key] = df
for key, data in merged_data_dict.items():
filtered_data = data[data['Date'] >= '2012-03-04']
r_hat_columns = [col for col in filtered_data.columns if '_r_hat' in col]
for r_hat_col in r_hat_columns:
price_col = r_hat_col.replace('_r_hat', '_price')
if price_col in filtered_data.columns:
# 使用向量化操作进行计算
indices = filtered_data.index[::-1]
shifted_price = filtered_data[price_col].shift(-1).reindex(indices)
adjusted_w_u_values = w_u_values[T - indices]
filtered_data.loc[indices, r_hat_col] += shifted_price * adjusted_w_u_values
merged_data_dict[key] = filtered_data
merged_data_dict[0.1].to_csv(r"D:\momentum_without_crush\merged.csv", index = False)
import warnings
# 忽略特定类型的警告
warnings.filterwarnings("ignore", category=RuntimeWarning)
def grid_search_per_d(merged_data_dict, initial_capital=1, trading_days_in_cycle=21, shorting_fraction= 0.1):
investment_fraction_values = [0.025, 0.1, 0.2, 0.4, 0.45, 0.5]
best_params_per_d = {}
for d, merged_data in merged_data_dict.items():
best_params = None
best_negative_years = float('inf')
best_final_capital = 0
for investment_fraction in investment_fraction_values:
final_capital, capital_over_time, long_capital_over_time, short_capital_over_time,capital_time_df = run_simulation(
merged_data, initial_capital, 'long', trading_days_in_cycle, investment_fraction, shorting_fraction
)
if (final_capital > best_final_capital):
best_params = {'investment_fraction': investment_fraction, 'shorting_fraction': shorting_fraction}
best_final_capital = final_capital
print(f"New best params for d = {d}: {best_params} Final capital: {best_final_capital}")
best_params_per_d[d] = best_params
return best_params_per_d
# Run the grid search
best_params_per_d = grid_search_per_d(merged_data_dict)
print(f"Best params per d: {best_params_per_d}")
New best params for d = 0.1: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.9742772112368465
New best params for d = 0.1: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.749086124684469
New best params for d = 0.1: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.644234861006474
New best params for d = 0.2: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.9723027610897716
New best params for d = 0.2: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.7641340823950555
New best params for d = 0.2: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.694696256387405
New best params for d = 0.3: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.876025543395229
New best params for d = 0.3: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.747151252055097
New best params for d = 0.3: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.669716549783102
New best params for d = 0.4: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.8769850366903307
New best params for d = 0.4: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.78378981934201
New best params for d = 0.4: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.651222007161921
New best params for d = 0.5: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.8798148308301714
New best params for d = 0.5: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.867453454533341
New best params for d = 0.5: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.612224528010142
New best params for d = 0.6: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.860321132257489
New best params for d = 0.6: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.940407643837893
New best params for d = 0.6: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.676211523812558
New best params for d = 0.7: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.892761680099581
New best params for d = 0.7: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.923533858267651
New best params for d = 0.7: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.762687559228131
New best params for d = 0.8: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.9792340808396216
New best params for d = 0.8: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 5.071500254321456
New best params for d = 0.8: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.786275684976067
New best params for d = 0.9: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 4.012778335260196
New best params for d = 0.9: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 5.117874611179349
New best params for d = 0.9: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.83048616722581
New best params for d = 0.0: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 11.233726414297216
New best params for d = 0.0: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 11.26001005277254
New best params for d = 0.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 11.952328261125679
New best params for d = 1.0: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 10.090359309849847
New best params for d = 1.0: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 11.319795698828981
New best params for d = 1.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 12.051178843402448
Best params per d: {0.1: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.2: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.3: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.4: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.5: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.6: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.7: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.8: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.9: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 1.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}}
initial_capital = 1
trading_days_in_cycle = 21
final_capital, capital_over_time, long_capital_over_time, short_capital_over_time, capital_time_df = run_simulation(
merged_data_dict[0], initial_capital, 'long', trading_days_in_cycle,0.006,0.1)
final_capital
4.400402015355775
metrics = calculate_metrics_from_capital(capital_over_time, trading_days_in_cycle , annual_trading_days=252)
metrics
| Indicator | Value | |
|---|---|---|
| 0 | Annualized Return | 0.177262 |
| 1 | Annualized Volatility | 0.285742 |
| 2 | Sharpe Ratio | 0.620356 |
| 3 | Max Drawdown | -0.480625 |
| 4 | IC | 0.049569 |
| 5 | ICIR | 0.600930 |
| 6 | t-statistic | 2.065267 |
| 7 | Rank IC | 0.091733 |
| 8 | Win_rate | 0.593985 |
annual_performance_sample = calculate_annual_performance(capital_over_time,long_capital_over_time, short_capital_over_time, trading_days_in_cycle)
annual_performance_sample
( Annual Return Long Annual Return Short Annual Return Max Drawdown 2012 -6.596295 -6.596295 NaN -16.398647 2013 16.660401 16.660401 NaN -16.503268 2014 113.283983 113.283983 NaN -6.177381 2015 -8.976034 -8.976034 NaN -47.555449 2016 5.181663 5.181663 NaN -12.990307 2017 4.205690 4.205690 NaN -18.069958 2018 -15.720422 -15.720422 NaN -30.008132 2019 34.590760 34.590760 NaN -3.318545 2020 24.020836 24.020836 NaN -7.447464 2021 35.662192 35.662192 NaN -17.262459 2022 10.827545 10.827545 NaN -9.568065 2023 -10.273541 -10.273541 NaN -10.273541, [2012, 2015, 2018, 2023])
import pandas as pd
hs300_data = pd.read_excel(r"D:\wind\沪深300指数.xlsx")
hs300_data.columns = ['Date', '沪深300指数']
hs300_data['Date'] = pd.to_datetime(hs300_data['Date'])
merged_df = pd.merge(capital_time_df, hs300_data, on='Date', how='inner')
merged_df['Normalized_沪深300指数'] = merged_df['沪深300指数'] / merged_df['沪深300指数'].iloc[0]
merged_df['超额收益'] = merged_df['Capital'] / merged_df['Normalized_沪深300指数']
merged_df
| Date | Capital | 沪深300指数 | Normalized_沪深300指数 | 超额收益 | |
|---|---|---|---|---|---|
| 0 | 2012-03-05 | 1.000000 | 2662.6980 | 1.000000 | 1.000000 |
| 1 | 2012-04-05 | 0.923819 | 2512.8320 | 0.943716 | 0.978916 |
| 2 | 2012-05-08 | 0.967525 | 2709.1160 | 1.017433 | 0.950948 |
| 3 | 2012-06-06 | 0.941296 | 2557.4010 | 0.960455 | 0.980053 |
| 4 | 2012-07-06 | 0.926490 | 2472.6140 | 0.928612 | 0.997715 |
| ... | ... | ... | ... | ... | ... |
| 129 | 2023-04-24 | 5.087475 | 3982.6429 | 1.495717 | 3.401362 |
| 130 | 2023-05-26 | 5.091094 | 3850.9511 | 1.446259 | 3.520181 |
| 131 | 2023-06-28 | 5.081716 | 3840.7986 | 1.442446 | 3.522985 |
| 132 | 2023-07-27 | 4.904241 | 3902.3458 | 1.465561 | 3.346324 |
| 133 | 2023-08-25 | 4.400402 | 3709.1517 | 1.393005 | 3.158928 |
134 rows × 5 columns
dates = merged_df['Date'].astype(str).tolist()
capital = merged_df['Capital'].tolist()
hs300 = merged_df['Normalized_沪深300指数'].tolist()
compared = merged_df['超额收益'].tolist()
# Initialize line chart
line = Line(init_opts=opts.InitOpts(theme='dark'))
line.add_xaxis(dates)
# Add lines
line.add_yaxis("Strategy Capital", capital, label_opts=opts.LabelOpts(is_show=False))
line.add_yaxis("沪深300指数", hs300, label_opts=opts.LabelOpts(is_show=False))
line.add_yaxis("超额收益", compared, label_opts=opts.LabelOpts(is_show=False))
# Add configuration
line.set_global_opts(
xaxis_opts=opts.AxisOpts(type_="category"),
datazoom_opts=[opts.DataZoomOpts(range_start=0, range_end=100)],
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)
# Render the chart
line.render_notebook()
line.render()
'c:\\Users\\GUO\\hello\\.venv\\quant\\momentum_without_crash\\render.html'